import pandas as pd
df = pd.read_csv(r'C:\Users\imane\OneDrive\Desktop\Data4good\p2-arbres-fr.csv', sep =";", encoding = "utf-8")
import missingno as msno
msno.bar(df)
<AxesSubplot:>
df.drop('id', axis = 1, inplace= True)
df.drop("complement_addresse",axis =1, inplace = True)
df.drop("numero",axis =1, inplace = True)
df.drop("id_emplacement",axis =1, inplace = True)
df.drop("espece",axis =1, inplace = True)
df.drop("variete",axis =1, inplace = True)
df.drop("remarquable",axis =1, inplace = True)
df.drop("type_emplacement",axis =1, inplace = True)
df.drop("libelle_francais",axis =1, inplace = True)
df.drop(df.index[(df["circonference_cm"] == 0)], axis = 0, inplace=True)
df.drop(df.index[(df["hauteur_m"] == 0)], axis = 0, inplace=True)
df.drop(df.index[(df["circonference_cm"] > 470)], axis = 0, inplace=True)
df.drop(df.index[(df["hauteur_m"] > 35 )], axis = 0, inplace=True)
df.dropna(subset=["circonference_cm"],inplace=True)
df.dropna(subset=["hauteur_m"],inplace=True)
df.dropna(subset=["stade_developpement"], inplace = True)
df["hauteur_m"] = 100 * df["hauteur_m"]
new_df = df.rename(columns={"hauteur_m":"hauteur_cm"})
new_df
| domanialite | arrondissement | lieu | genre | circonference_cm | hauteur_cm | stade_developpement | geo_point_2d_a | geo_point_2d_b | |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Jardin | PARIS 7E ARRDT | MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E | Taxus | 65 | 800 | A | 48.857656 | 2.321031 |
| 2 | Jardin | PARIS 7E ARRDT | MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E | Taxus | 90 | 1000 | A | 48.857705 | 2.321061 |
| 3 | Jardin | PARIS 7E ARRDT | MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E | Acer | 60 | 800 | A | 48.857722 | 2.321006 |
| 8 | Jardin | PARIS 16E ARRDT | JARDIN DE L AVENUE FOCH / 10 AVENUE FOCH | Sophora | 145 | 1400 | A | 48.871990 | 2.275814 |
| 9 | Jardin | PARIS 16E ARRDT | JARDIN DE L AVENUE FOCH / 10 AVENUE FOCH | Sophora | 135 | 1000 | A | 48.872046 | 2.275752 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 198859 | DJS | PARIS 19E ARRDT | CENTRE SPORTIF JULES LADOUMEGUE / 35 ROUTE DES... | Populus | 20 | 500 | J | 48.890466 | 2.397443 |
| 198860 | Jardin | PARIS 14E ARRDT | PARC MONTSOURIS | Fagus | 55 | 700 | J | 48.823919 | 2.337872 |
| 198861 | Jardin | PARIS 14E ARRDT | PARC MONTSOURIS | Taxus | 55 | 500 | JA | 48.821099 | 2.338411 |
| 198862 | Jardin | PARIS 14E ARRDT | PARC MONTSOURIS | Taxus | 75 | 500 | JA | 48.823552 | 2.337892 |
| 198865 | DJS | PARIS 13E ARRDT | CENTRE SPORTIF GEORGES CARPENTIER / 81 BOULEVA... | Acer | 165 | 1100 | A | 48.819252 | 2.370641 |
129996 rows × 9 columns
new_df.boxplot(column=['circonference_cm'])
<AxesSubplot:>
new_df.boxplot(column=['hauteur_cm'])
<AxesSubplot:>
msno.bar(new_df)
<AxesSubplot:>
import seaborn as sns
n_tree = [1 for i in range(len(new_df))]
new_df["n_tree"] = n_tree
new_df["arrondissement"].replace({"PARIS 10E ARRDT":"10E ARR","PARIS 11E ARRDT":"11E ARR","PARIS 12E ARRDT":"12E ARR",
"PARIS 13E ARRDT":"13E ARR","PARIS 14E ARRDT":"14E ARR","PARIS 15E ARRDT":"15E ARR",
"PARIS 16E ARRDT":"16E ARR","PARIS 17E ARRDT":"17E ARR","PARIS 18E ARRDT":"18E ARR",
"PARIS 19E ARRDT":"19E ARR","PARIS 1ER ARRDT":"1ER ARR","PARIS 20E ARRDT":"20E ARR",
"PARIS 2E ARRDT":"2E ARR","PARIS 3E ARRDT":"3E ARR","PARIS 4E ARRDT":"4E ARR",
"PARIS 5E ARRDT":"5E ARR","PARIS 6E ARRDT":"6E ARR","PARIS 7E ARRDT":"7E ARR",
"PARIS 8E ARRDT":"8E ARR","PARIS 9E ARRDT":"9E ARR","SEINE-SAINT-DENIS":"S.S.DENIS",
"VAL-DE-MARNE":"V.MARNE","BOIS DE BOULOGNE":"B. BOULOGNE","BOIS DE VINCENNES":"B.VINCENNES",
"HAUTS-DE-SEINE":"H.SEINE"}, inplace = True)
new_df_2 = new_df.groupby("arrondissement").sum()
new_df_2.drop("circonference_cm",axis =1, inplace = True)
new_df_2.drop("hauteur_cm",axis =1, inplace = True)
new_df_2.drop("geo_point_2d_a",axis =1, inplace = True)
new_df_2.drop("geo_point_2d_b",axis =1, inplace = True)
for_sns = pd.melt(new_df_2.reset_index(),id_vars=['arrondissement'],value_vars=new_df_2.columns)
p = sns.barplot(y="arrondissement",x="value", data = for_sns,hue = "variable")
p.set_title(" Nombre d'arbres par arroundissement")
Text(0.5, 1.0, " Nombre d'arbres par arroundissement")
genre = new_df[["n_tree","genre"]]
df_map = genre.copy()
n_df = df_map.groupby(["genre"]).sum().reset_index()
sns.barplot(data = n_df, x= "genre", y="n_tree")
<AxesSubplot:xlabel='genre', ylabel='n_tree'>
selected_col = new_df[["arrondissement","circonference_cm","hauteur_cm"]]
new_df_3 = selected_col.copy()
ndf_3 = new_df_3.groupby("arrondissement").mean()
for_sns_2 = ndf_3.reset_index()
g = sns.scatterplot(data = for_sns_2,x = "circonference_cm",y = "hauteur_cm",hue = "arrondissement")
g.set_title("hauteur_cm et circonference_cm moyenne par arrondissement")
Text(0.5, 1.0, 'hauteur_cm et circonference_cm moyenne par arrondissement')
selected_col_2 = new_df[["stade_developpement","hauteur_cm","circonference_cm"]]
new_df_4 = selected_col_2.copy()
ndf_4 = new_df_4.groupby("stade_developpement").mean()
for_sns_3 = ndf_4.reset_index()
import matplotlib.pyplot as plt
fig,ax = plt.subplots()
ax = sns.lineplot(data = for_sns_3, x= "stade_developpement",y="circonference_cm")
ax1 = sns.lineplot(data = for_sns_3,x = "stade_developpement", y = "hauteur_cm")
ax.set_title("Hauteur_cm et circonference_cm moyenne par stade de developpement")
Text(0.5, 1.0, 'Hauteur_cm et circonference_cm moyenne par stade de developpement')
selected_col_3 = new_df[["stade_developpement","n_tree","arrondissement"]]
new_df_5 = selected_col_3.copy()
ndf_5 = new_df_5.groupby(["arrondissement","stade_developpement"]).sum().reset_index()
graph = sns.histplot(data = ndf_5,x= "arrondissement", weights= "n_tree", hue = "stade_developpement", multiple="stack")
graph.set_title("Nombre d'arbres par arrondissement et stade de developpement")
Text(0.5, 1.0, "Nombre d'arbres par arrondissement et stade de developpement")
selected_col_4 = new_df[["n_tree","domanialite","arrondissement"]]
new_df_6 = selected_col_4.copy()
ndf_6 = new_df_6.groupby(["domanialite","arrondissement"]).sum().reset_index()
import plotly.express as px
figure = px.treemap(ndf_6,path = ["domanialite","arrondissement"], values="n_tree")
figure.show()
selected_col_5 = new_df[["arrondissement","n_tree","geo_point_2d_a","geo_point_2d_b"]]
new_df_7 = selected_col_5.copy()
ndf_7 = new_df_7.groupby("arrondissement").sum().reset_index()
ndf_7.drop('geo_point_2d_a', axis = 1, inplace= True)
ndf_7.drop('geo_point_2d_b', axis = 1, inplace= True)
new_df_8 = new_df_7.groupby(["arrondissement"]).nth(0).reset_index()
a = new_df_8["geo_point_2d_a"]
b = new_df_8["geo_point_2d_b"]
ndf_7 = ndf_7.join(a)
ndf_7 = ndf_7.join(b)
import folium
map = folium.Map(location=[48.856614, 2.3522219], zoom_start=14,control_scale=True,tiles="Stamen Terrain")
for i in range(0,len(ndf_7)):
folium.Circle(
location = [ndf_7.iloc[i]["geo_point_2d_a"], ndf_7.iloc[i]["geo_point_2d_b"]],
tooltip = (ndf_7.iloc[i]["arrondissement"],ndf_7.iloc[i]["n_tree"]) ,
radius = int(ndf_7.iloc[i]["n_tree"]) / len(ndf_7), fill = True,
).add_to(map)
map
selected_col_6 = new_df[["arrondissement","geo_point_2d_a","geo_point_2d_b","domanialite"]]
new_df_8 = selected_col_6.copy()
scatter_map = px.scatter_mapbox(new_df_8,lat ="geo_point_2d_a",lon="geo_point_2d_b",hover_name="arrondissement", hover_data=["domanialite"],
zoom=14, height=500, color="arrondissement")
scatter_map.update_layout(mapbox_style="open-street-map")